################################################################################
# Created By:Pietryka
# Creation Date:  2016-08-22
# Purpose: Clean CPS non-voting participation data
# Questions: mpietryka@fsu.edu
# Notes: Before running this code, download the data:
#        Current Population Survey, November 2010: Civic Engagement Supplement
#        (ICPSR 32341)
#        https://doi.org/10.3886/ICPSR32341.v2
################################################################################


# PREAMBLE =========================================


# LOAD PACKAGES  -----------------------
library(tidyverse)
library(haven)
library(rio)
library(cem)



# LOAD DATA -----------------------

# (ICPSR 32341)
# https://doi.org/10.3886/ICPSR32341.v2

cps_10 <- import("Data/32341-0001-Data.tsv") %>%
  tibble()

# VARIABLES OF INTEREST --------------

outcome_names <- c(
  "discuss_pol",
  "contact_off",
  "comm_group",
  "civic_group",
  "group_officer"
  )

treatment_name <- c("prekkids")


control_names <- c(
  "educ",
  "famincome",
  "age",
  "female",
  "student",
  "PEMARITL", 
  "PTDTRACE",  
  "PEMLR"    
  )


all_vars <- c(outcome_names,  treatment_name, control_names)

# VARIABLES TO EXCLUDE FROM MATCHING
to_drop <- c(
  outcome_names, # OUTCOME
  treatment_name # TREATMENT
  )

# COVARIATES FOR MATCHING
covariates <- control_names
interval_covariates <- c("age",
                         "famincome",
                         "educ")
binary_covariates <- c("female",
                       "student")
categorical_covariates <- c("PEMARITL",
                            "PTDTRACE",
                            "PEMLR")

# Clean variables  --------------

all_na <- function(x){
  all(is.na(x))
}


cps_clean_10 <- cps_10  %>%
  # participation battery
  mutate(
    discuss_pol = case_when(
      PEQ2 == 4 ~ 1L,
      PEQ2 == 3 ~ 2L,
      PEQ2 == 2 ~ 3L,
      PEQ2 == 1 ~ 4L,
      PEQ2 == 5 ~ 0L,
      TRUE ~ NA_integer_
  ),
  contact_off = case_when(
    PEQ4A==1 ~ 1L,
    PEQ4A==2 ~ 0L,
    TRUE ~ NA_integer_
  ),
  comm_group = case_when(
    PEQ5A==1 ~ 1L,
    PEQ5A==2 ~ 0L,
    TRUE ~ NA_integer_
  ),
  civic_group = case_when(
    PEQ5B==1 ~ 1L,
    PEQ5B==2 ~ 0L,
    TRUE ~ NA_integer_
  ),
  group_officer = case_when(
    PEQ6==1 ~ 1L,
    PEQ6==2 ~ 0L,
    TRUE ~ NA_integer_
  )
  ) %>%
  # identify people who missed entire battery
  mutate(missed_all = apply(select(., one_of(outcome_names)), 1, all_na))  %>%
  mutate(
    age = PEAGE %>% na_if(-1),
    educ = PEEDUCA %>% na_if(-1),
    famincome = if_else(
      HEFAMINC <= 0,
      NA_integer_,
      HEFAMINC - 1L,
      missing = NA_integer_
    ),
    female = case_when(
      PESEX == 2 ~ 1L,
      PESEX == 1 ~ 0L,
      TRUE ~ NA_integer_
    ),
    student = case_when(
      PENLFACT == 3 ~ 1L,
      PENLFACT != 3 & !is.na(PEMLR) ~ 0L,
      TRUE ~ NA_integer_
    ),
    prekkids = case_when(
      PRCHLD %in% c(1, 2, 5:9, 11:15) ~ 1L,
      PRCHLD %in% c(0, 3, 4, 10) ~ 0L,
      TRUE ~ NA_integer_
    )
  )



# SUBSET DATA --------------


cps_sub_10 <- cps_clean_10  %>%
  filter(
    missed_all == FALSE, # REMOVES THOSE MISSING ON ALL OUTCOMES
    age >= 18,     # REMOVES UNDER 18
    between(PRNMCHLD, 0, 1)
    ) %>%   # omit individuals who had more than one child)
    dplyr::select(one_of(all_vars))


# RECODE MISSINGS  ---------------------------

to_missing <- function(x){
  ifelse(x < 0, NA, x)
}
cps_sub_10 <- cps_sub_10  %>%
  mutate_at(vars(PEMARITL, PTDTRACE, PEMLR), funs(to_missing))  %>%
  mutate(PTDTRACE = ifelse(PTDTRACE > 5, 6, PTDTRACE))



save.image("Data/CPS-1B-Clean-Participation.Rdata")
